RNA-seq report for sample CCR170012_MH17T001P013.
##### Define functions
##### Create 'not in' operator
"%!in%" <- function(x,table) match(x,table, nomatch = 0) == 0
##### Prepare object to write into a file
prepare2write <- function (x) {
x2write <- cbind(rownames(x), x)
colnames(x2write) <- c("Gene",colnames(x))
return(x2write)
}
##### Combine sample expression profile with reference datasets. This function outputs a vector with first element containing the merged data and second element containing merged targets info
combineDatasets <- function(sample_name, sample_counts, ref_dataset) {
##### Read file with reference datasets information
DatasetInput=read.table(ref_dataset, sep="\t", as.is=TRUE, header=TRUE, row.names=1)
##### Extract info about target file for the first dataset
fileInfo = strsplit(DatasetInput[,"Target_file"], split='/', fixed=TRUE)
targetFile <- read.table(DatasetInput[1,"Target_file"], sep="\t", as.is=TRUE, header=TRUE)[,c(1:4)]
rownames(targetFile) <- targetFile[,"Sample_name"]
targetFile <- cbind(targetFile[,2:4],rownames(DatasetInput[1,]))
colnames(targetFile)[ncol(targetFile)] <- "Dataset"
if ( nrow(DatasetInput) > 1 ) {
for ( i in 2:nrow(DatasetInput) ) {
##### Create a temporary object to store info from the remaining target files
targetFileTmp <- read.table(DatasetInput[i,"Target_file"], sep="\t", as.is=TRUE, header=TRUE)[,c(1:4)]
rownames(targetFileTmp) <- targetFileTmp[,"Sample_name"]
targetFileTmp <- cbind(targetFileTmp[,2:4],rownames(DatasetInput[i,]))
colnames(targetFileTmp)[ncol(targetFileTmp)] <- "Dataset"
targetFile <- rbind(targetFile, targetFileTmp)
}
}
##### Add sample info
sampleTargetFile <- data.frame(sample_counts, sample_name, NA, sample_name)
names(sampleTargetFile) <- names(targetFile)
rownames(sampleTargetFile) <- sample_name
targetFile <- rbind( targetFile, sampleTargetFile )
##### Make syntactically valid names
rownames(targetFile) <- make.names(rownames(targetFile))
##### Read sample read count file and combine it with reference datasets
datasets.comb=read.table(sample_counts, sep="\t", as.is=TRUE, header=FALSE, row.names=NULL)
names(datasets.comb) <- c("", sample_name)
##### list genes present in the read count file
gene_list <- as.vector(datasets.comb[,1])
##### Loop through the expression data from different datasets and merge them into one matrix
for ( data_matrix in DatasetInput[ , "Expression_matrix" ] ) {
##### Add data from the reference datasets
dataset <- as.data.frame( read.table(data_matrix, header=TRUE, sep="\t", row.names=NULL) )
##### list genes present in individal files
gene_list <- c( gene_list, as.vector(dataset[,1]) )
##### Merge the expression datasets and make sure that the genes order is the same
datasets.comb <- merge( datasets.comb, dataset, by=1, all = FALSE, sort= TRUE)
##### Remove per-sample data for merged samples to free some memory
rm(dataset)
}
##### Use gene IDs as rownames
rownames(datasets.comb) <- datasets.comb[,1]
datasets.comb <- datasets.comb[, -1]
##### Make syntactically valid names
colnames(datasets.comb) <- make.names(colnames(datasets.comb))
##### Make sure that the target file contains info only about samples present in the data matrix
targetFile <- targetFile[ rownames(targetFile) %in% colnames(datasets.comb), ]
##### Make sure that the samples order in the data matrix is the same as in the target file
datasets.comb <- datasets.comb[ , rownames(targetFile) ]
##### Identify genes that were not present across all per-sampel files and were ommited in the merged matrix
gene_list <- unique(gene_list)
gene_list.missing <- gene_list[ gene_list %!in% rownames(datasets.comb) ]
##### Write list of missing genes into a file
if ( length(gene_list.missing) > 0 ) {
write.table(prepare2write(gene_list.missing), file = paste0(params$report_dir, "/", sample_name,".missing_genes.txt"), sep="\t", quote=FALSE, row.names=TRUE, append = FALSE )
}
return( list(datasets.comb, targetFile) )
}
##### Assign colours to different groups
getTargetsColours <- function(targets) {
##### Predefined selection of colours for groups
targets.colours <- c("red","blue","green","darkgoldenrod","darkred","deepskyblue", "coral", "cornflowerblue", "chartreuse4", "bisque4", "chocolate3", "cadetblue3", "darkslategrey", "lightgoldenrod4", "mediumpurple4", "orangered3","indianred1","blueviolet","darkolivegreen4","darkgoldenrod4","firebrick3","deepskyblue4", "coral3", "dodgerblue1", "chartreuse3", "bisque3", "chocolate4", "cadetblue", "darkslategray4", "lightgoldenrod3", "mediumpurple3", "orangered1")
f.targets <- factor(targets)
vec.targets <- targets.colours[1:length(levels(f.targets))]
targets.colour <- rep(0,length(f.targets))
for(i in 1:length(f.targets))
targets.colour[i] <- vec.targets[ f.targets[i]==levels(f.targets)]
return( list(vec.targets, targets.colour) )
}
##### Assign colours to different datasets
getDatasetsColours <- function(datasets) {
##### Predefined selection of colours for datasets
datasets.colours <- c("dodgerblue","firebrick","lightslategrey","darkseagreen","orange","darkcyan","bisque", "coral2", "cadetblue3","red","blue","green")
f.datasets <- factor(datasets)
vec.datasets <- datasets.colours[1:length(levels(f.datasets))]
datasets.colour <- rep(0,length(f.datasets))
for(i in 1:length(f.datasets))
datasets.colour[i] <- vec.datasets[ f.datasets[i]==levels(f.datasets)]
return( list(vec.datasets, datasets.colour) )
}
##### Perform PCA. This function outputs a list with dataframe and samples colouring info ready for plotting
pca <- function(data, targets) {
##### Keep only genes with variance > 0 across all samples
rsd <- apply(data,1,sd)
data.subset <- data[rsd>0,]
##### Perform PCA
data.subset_pca <- prcomp(t(data.subset), scale=FALSE)
##### Get variance importance for all principal components
importance_pca <- summary(data.subset_pca)$importance[2,]
importance_pca <- paste(round(100*importance_pca, 2), "%", sep="")
names(importance_pca) <- names(summary(data.subset_pca)$importance[2,])
##### Prepare data frame
data.subset_pca.df <- data.frame(targets$Target, targets$Dataset, data.subset_pca$x[,"PC1"], data.subset_pca$x[,"PC2"], data.subset_pca$x[,"PC3"])
colnames(data.subset_pca.df) <- c("Target", "Dataset", "PC1", "PC2", "PC3")
##### Assigne colours to targets and datasets
targets.colour <- getTargetsColours(target$Target)
datasets.colour <- getDatasetsColours(target$Dataset)
##### Create a list with dataframe and samples colouring info
pca.list <- list(data.subset_pca.df, importance_pca, targets.colour, datasets.colour)
names(pca.list) <- c("pca.df", "importance_pca", "targets", "datasets")
return( pca.list )
}
##### Generate cumulative distribution function (CDF) plot for selected gene using sample combined with pancreas expression data
cdfPlot <- function(gene, data, targets, sampleName) {
##### Subset data for each biological group
sample.expr <- sort(data[, sampleName])
normal.expr <- sort(data[, "SRR1464226"])
PDAC.expr <- sort(data[, "TCGA.IB.AAUO.01A.12R.A38C.07"])
met.expr <- sort(data[, "TCGA.HZ.A9TJ.06A.11R.A41B.07"])
##### Perform range standardization between 0 and 1 (for the cumulative sums)
sample.expr <- sort(sample.expr-min(sample.expr))/(max(sample.expr)-min(sample.expr))
normal.expr <- sort(normal.expr-min(normal.expr))/(max(normal.expr)-min(normal.expr))
PDAC.expr <- sort(PDAC.expr-min(PDAC.expr))/(max(PDAC.expr)-min(PDAC.expr))
met.expr <- sort(met.expr-min(met.expr))/(max(met.expr)-min(met.expr))
##### Calculate cumulative sums and organise the data into data frame
sample.expr.cum <- as.data.frame(cbind(c(1:length(sample.expr)), sample.expr, cumsum(sample.expr)))
normal.expr.cum <- as.data.frame(cbind(c(1:length(normal.expr)), normal.expr, cumsum(normal.expr)))
PDAC.expr.cum <- as.data.frame(cbind(c(1:length(PDAC.expr)), PDAC.expr, cumsum(PDAC.expr)))
met.expr.cum <- as.data.frame(cbind(c(1:length(met.expr)), met.expr, cumsum(met.expr)))
names(sample.expr.cum) <- c("index", "expression", "cumulative_fraction")
names(normal.expr.cum) <- c("index", "expression", "cumulative_fraction")
names(PDAC.expr.cum) <- c("index", "expression", "cumulative_fraction")
names(met.expr.cum) <- c("index", "expression", "cumulative_fraction")
##### Extract expression for selected genes
sample.expr.cum.selected <- sample.expr.cum[ rownames(sample.expr.cum) %in% unlist(gene), ]
normal.expr.cum.selected <- normal.expr.cum[ rownames(normal.expr.cum) %in% unlist(gene), ]
PDAC.expr.cum.selected <- PDAC.expr.cum[ rownames(PDAC.expr.cum) %in% unlist(gene), ]
met.expr.cum.selected <- met.expr.cum[ rownames(met.expr.cum) %in% unlist(gene), ]
##### Generate interactive CFD plot with plotly
p <- plot_ly(sample.expr.cum, x = ~expression, color = I("black")) %>%
##### Add sample data
add_markers(y = sample.expr.cum.selected$index, x = sample.expr.cum.selected$expression,
text = rownames(sample.expr.cum.selected),
name = "Selected genes",
marker = list(size = 9, color = "black"),
showlegend = FALSE) %>%
add_lines(y = sample.expr.cum$index, x = sample.expr.cum$expression,
line = list(color = "grey"),
text = rownames( met.expr.cum.selected),
name = params$sample, showlegend = TRUE) %>%
##### Add normal pancreas data
add_markers(y = normal.expr.cum.selected$index, x = normal.expr.cum.selected$expression,
text = rownames( normal.expr.cum.selected),
name = "Selected genes",
marker = list(size = 9, color = "green"),
showlegend = FALSE) %>%
add_lines(y = normal.expr.cum$index, x = normal.expr.cum$expression, opacity = 0.5,
line = list(color = "green", dash = "dash"),
text = rownames( met.expr.cum.selected),
name = "Pancreas (normal)", showlegend = TRUE) %>%
##### Add PDAC data
add_markers(y = PDAC.expr.cum.selected$index, x = PDAC.expr.cum.selected$expression,
text = rownames( PDAC.expr.cum.selected),
name = "Selected genes",
marker = list(size = 9, color = "coral"),
showlegend = FALSE) %>%
add_lines(y = PDAC.expr.cum$index, x = PDAC.expr.cum$expression, opacity = 0.5,
line = list(color = "coral", dash = "dash"),
text = rownames( met.expr.cum.selected),
name = "PDAC", showlegend = TRUE) %>%
##### Add metastatic PDAC data
add_markers(y = met.expr.cum.selected$index, x = met.expr.cum.selected$expression,
text = rownames( met.expr.cum.selected),
name = "Selected genes",
marker = list(size = 9, color = "red"),
showlegend = FALSE) %>%
add_lines(y = met.expr.cum$index, x = met.expr.cum$expression, opacity = 0.5,
line = list(color = "red", dash = "dash"),
text = rownames( met.expr.cum.selected),
name = "PDAC (metastatic)", showlegend = TRUE) %>%
layout(xaxis = list(title = "mRNA expression"),
yaxis = list(title = "Cumulative fraction"),
legend = list(orientation = 'v', x = 0.02, y = 0.9, bgcolor = "transparent"),
annotations = list(x = sample.expr.cum.selected$expression, y = sample.expr.cum.selected$index,
text = rownames(sample.expr.cum.selected), xref = "x", yref = "y", showarrow = TRUE, arrowhead = 0, opacity = 0.5, ax = 50, ay = 50
))
return( p )
}
##### Generate box-plot for selected gene using sample combined with pancreas expression data
boxPlot <- function(gene, data, targets, targets.colour, sampleName, order ) {
##### Extract expression for selected genes
gene.expr <- data[ rownames(data) %in% unlist(gene), ]
##### Prepare data frame
gene.expr.df <- data.frame(targets, as.numeric(gene.expr))
colnames(gene.expr.df) <- c("Group", "Expression")
##### Apply defined group order
order <- order[order %in% gene.expr.df$Group]
gene.expr.df$Group <- factor(gene.expr.df$Group, levels=order)
p <- plot_ly(gene.expr.df, y= ~Expression, color = ~Group, type = 'box', jitter = 0.3, pointpos = 0, boxpoints = 'all', colors = targets.colour[[1]], width = 800, height = 600) %>%
layout(yaxis = list( title ="mRNA expression"), margin = list(l=50, r=50, b=50, t=50, pad=4), autosize = F, legend = list(orientation = 'h', y = 1.3), showlegend=TRUE)
return( p )
}
##### Load libraries
suppressMessages(library(edgeR))
suppressMessages(library(preprocessCore))
suppressMessages(library(rapportools))
suppressMessages(library(plotly))
suppressMessages(library(edgeR))
suppressMessages(library(org.Hs.eg.db))
##### Load reference datasets
##### Create a list with reference datasets
ref_datasets <- c("pancreas")
ref_datasets.list <- vector("list", length(ref_datasets))
names(ref_datasets.list) <- ref_datasets
##### Create a list with subtype-associated genes
ref_genes <- c("genes_fusions","genes_pmcc")
ref_genes.list <- vector("list", length(ref_genes))
names(ref_genes.list) <- ref_genes
##### Read in reference datasets and merge them with sample data. This part outputs a vector with first element containing the merged data and second element containing merged targets info
ref_datasets.list[["pancreas"]] <- combineDatasets(params$sample_name, params$count_file, params$datasets_pancreas)
##### Read in selected genes list
ref_genes.list[["genes_fusions"]] <- read.table(params$genes_fusions, sep="\t", as.is=TRUE, header=FALSE, row.names=NULL)
ref_genes.list[["genes_pmcc"]] <- read.table(params$genes_pmcc, sep="\t", as.is=TRUE, header=FALSE, row.names=NULL)
##### Data transformation and filtering
##### For differential expression and related analyses, gene expression is rarely considered at the level of raw counts since libraries sequenced at a greater depth will result in higher counts. Rather, it is common practice to transform raw counts onto a scale that accounts for such library size differences. Here we convert the read count data into log2-counts per million (***log-CPM***) using function from *[edgeR](https://bioconductor.org/packages/release/bioc/html/edgeR.html)* package. Genes with very low counts across all libraries provide little evidence for differential expression. In the biological point of view, a gene must be expressed at some minimal level before it is likely to be translated into a protein or to be biologically important. In addition, the pronounced discretenes of these counts interferes with some of the statistical approximations that are used later in the pipeline. These genes should be filtered out prior to further analysis.
##### Loop through combined datasets
for ( ref in names(ref_datasets.list) ) {
counts <- ref_datasets.list[[ref]][[1]]
target <- ref_datasets.list[[ref]][[2]]
##### Create EdgeR DGEList object
y <- DGEList(counts=counts, group=target$Target)
##### Add datasets name for each sample
y$samples$dataset <- target$Dataset
##### Filtering to remove low expressed genes. Users should filter with CPM rather than filtering on the counts directly, as the latter does not account for differences in library sizes between samples. Here we keep only genes that have CPM of 1
keep <- rowSums(cpm(y)>1) >= ncol(counts)/10
y.filtered <- y[keep, , keep.lib.sizes=FALSE]
ref_datasets.list[[ref]][[3]] <- y.filtered
}
# cat("The CPM of 1 (cut-off for removing low expressed genes) corresponds to", round(min(as.numeric(colSums(counts)*1e-6)), digits=0), "reads in sample with the lowest sequencing depth, and", round(max(as.numeric(colSums(counts)*1e-6)), digits=0), "reads in sample with the greatest sequencing depth\n")
cat(nrow(y.filtered$counts), "genes remained after filtering low expressed genes, out of the total", nrow(counts), "input genes\n\n")
18325 genes remained after filtering low expressed genes, out of the total 52683 input genes
##### Data normalisation
##### During the sample preparation or sequencing process, external factors that are not of biological interest can affect the expression of individual samples. For example, samples processed in the first batch of an experiment can have higher expression overall when compared to samples processed in a second batch. It is assumed that all samples should have a similar range and distribution of expression values. Normalisation for sample-specific effectss is required to ensure that the expression distributions of each sample are similar across the entire experiment. Normalisation by the method of *[trimmed mean of M-values](https://www.ncbi.nlm.nih.gov/pubmed/20196867) (TMM)* is performed using the *calcNormFactors* function in *[edgeR](https://bioconductor.org/packages/release/bioc/html/edgeR.html)*. The normalisation factors calculated here are used as a scaling factor for the library sizes. TMM is the recommended for most RNA-Seq data where the majority (more than half) of the genes are believed not differentially expressed between any pair of the samples.
##### Adjust for RNA composition effect. Calculate scaling factors for the library sizes with calcNormFactors function using trimmed mean of M-values (TMM) between each pair of samples. Note, that the raw read counts are used to calculate the normalisation factors
##### Loop through combined datasets
for ( ref in names(ref_datasets.list) ) {
y.filtered <- ref_datasets.list[[ref]][[3]]
y.filtered.norm <- calcNormFactors(y.filtered, method = "TMM")
##### Transformations from the raw-scale to CPM
y.filtered.norm.cpm <- cpm(y.filtered.norm, normalized.lib.sizes=TRUE, log=TRUE, prior.count=0.25)
ref_datasets.list[[ref]][[3]] <- y.filtered.norm.cpm
}
##### Principal component analysis (PCA)
##### Loop through combined datasets and perform PCA
for ( ref in names(ref_datasets.list) ) {
target <- ref_datasets.list[[ref]][[2]]
data <- ref_datasets.list[[ref]][[3]]
ref_datasets.list[[ref]][[4]] <- pca(data, target)
}
##### Loop through combined datasets and annotate genes
for ( ref in names(ref_datasets.list) ) {
##### Convert data into a data frame to make the Ensembl ID and gene symbol matches (with merge function)
data <- ref_datasets.list[[ref]][[3]]
data.df <- as.data.frame(cbind(rownames(data), data))
colnames(data.df)[1] <- "ENSEMBL"
##### Extract the gene annotation information
gene_info <- AnnotationDbi::select(org.Hs.eg.db, rownames(data), c("ENTREZID", "GENENAME", "SYMBOL", "ENSEMBL"), 'ENSEMBL')
##### Merge the gene annotation with expression data
data.annot <- merge(gene_info, data.df, by = "ENSEMBL", all.x = TRUE)
##### Keep only genes fo which gene symbol is available
data.annot <- data.annot[!(is.na(data.annot$SYMBOL) | data.annot$SYMBOL==""), ]
##### Remove rows with duplicated gene symbols
data.annot = data.annot[!duplicated(data.annot$SYMBOL),]
rownames(data.annot) <- data.annot$SYMBOL
##### Get data matrix with gene symbols
#data <- data.annot[, colnames(data)]
#data <- apply(data.annot[, colnames(data)], 2, as.numeric)
#rownames(data) <- data.annot$SYMBOL
ref_datasets.list[[ref]][[3]] <- apply(data.annot[, colnames(data)], 2, as.numeric)
rownames(ref_datasets.list[[ref]][[3]]) <- data.annot$SYMBOL
ref_datasets.list[[ref]][[5]] <- data.annot[, c("ENTREZID", "GENENAME", "SYMBOL", "ENSEMBL")]
}
The expression profiles of detected fusion genes across pancreas-related lesions/tissues.
Empirical cumulative distribution function (ECDF) plot illustrating mRNA expression level of ALB in the context of the overall mRNA expression distribution in sample CCR170012_MH17T001P013 and samples representing various pancreas-related lesions/tissues.
suppressMessages(library(plotly))
##### Generate PCA plots for pancreatic related lesions
ref <- "pancreas"
target <- ref_datasets.list[[ref]][[2]]
data <- ref_datasets.list[[ref]][[3]]
targets.colour <- ref_datasets.list[[ref]][[4]]$targets
datasets.colour <- ref_datasets.list[[ref]][[4]]$datasets
gene <- ref_genes.list[["genes_fusions"]][1,]
cdfPlot(gene, data, target, params$sample_name)
##### Detach plotly package. Otherwise it clashes with other graphics devices
detach("package:plotly", unload=FALSE)
Box-plot illustrating mRNA expression level of ALB (y-axis) in sample CCR170012_MH17T001P013 and its expression distribution in samples representing various pancreas-related lesions/tissues (x-axis).
suppressMessages(library(plotly))
##### Generate PCA plots for pancreatic related lesions
target <- target$Target
order = c(params$sample_name, "Pancreas (normal)", "Pancreas (normal adjacent)", "IPMN", "PanNET", "PDAC", "PDAC (metastatic)")
##### Generate box-plot for selected gene using sample combined with pancreas expression data
boxPlot(gene, data, target, targets.colour, params$sample_name, order )
##### Detach plotly package. Otherwise it clashes with other graphics devices
detach("package:plotly", unload=FALSE)
Table with of ALB annotation and mRNA expression levels in sample CCR170012_MH17T001P013 and average mRNA expression across samples representing different pancreas-related lesions and tissues.
TO BE COMPLETED…
DT::datatable( data = ref_datasets.list[[ref]][[5]][gene, ], filter = "none", options = list(pageLength = 1) )
Empirical cumulative distribution function (ECDF) plot illustrating mRNA expression level of APOA1 in the context of the overall mRNA expression distribution in sample CCR170012_MH17T001P013 and samples representing various pancreas-related lesions/tissues.
suppressMessages(library(plotly))
##### Generate PCA plots for pancreatic related lesions
ref <- "pancreas"
target <- ref_datasets.list[[ref]][[2]]
data <- ref_datasets.list[[ref]][[3]]
targets.colour <- ref_datasets.list[[ref]][[4]]$targets
datasets.colour <- ref_datasets.list[[ref]][[4]]$datasets
gene <- ref_genes.list[["genes_fusions"]][2,]
cdfPlot(gene, data, target, params$sample_name)
##### Detach plotly package. Otherwise it clashes with other graphics devices
detach("package:plotly", unload=FALSE)
Box-plot illustrating mRNA expression level of APOA1 (y-axis) in sample CCR170012_MH17T001P013 and its expression distribution in samples representing various pancreas-related lesions/tissues (x-axis).
suppressMessages(library(plotly))
##### Generate PCA plots for pancreatic related lesions
target <- target$Target
order = c(params$sample_name, "Pancreas (normal)", "Pancreas (normal adjacent)", "IPMN", "PanNET", "PDAC", "PDAC (metastatic)")
##### Generate box-plot for selected gene using sample combined with pancreas expression data
boxPlot(gene, data, target, targets.colour, params$sample_name, order )
##### Detach plotly package. Otherwise it clashes with other graphics devices
detach("package:plotly", unload=FALSE)
Table with of APOA1 annotation and mRNA expression levels in sample CCR170012_MH17T001P013 and average mRNA expression across samples representing different pancreas-related lesions and tissues.
TO BE COMPLETED…
DT::datatable( data = ref_datasets.list[[ref]][[5]][gene, ], filter = "none", options = list(pageLength = 1) )
Empirical cumulative distribution function (ECDF) plot illustrating mRNA expression level of C3 in the context of the overall mRNA expression distribution in sample CCR170012_MH17T001P013 and samples representing various pancreas-related lesions/tissues.
suppressMessages(library(plotly))
##### Generate PCA plots for pancreatic related lesions
ref <- "pancreas"
target <- ref_datasets.list[[ref]][[2]]
data <- ref_datasets.list[[ref]][[3]]
targets.colour <- ref_datasets.list[[ref]][[4]]$targets
datasets.colour <- ref_datasets.list[[ref]][[4]]$datasets
gene <- ref_genes.list[["genes_fusions"]][3,]
cdfPlot(gene, data, target, params$sample_name)
##### Detach plotly package. Otherwise it clashes with other graphics devices
detach("package:plotly", unload=FALSE)
Box-plot illustrating mRNA expression level of C3 (y-axis) in sample CCR170012_MH17T001P013 and its expression distribution in samples representing various pancreas-related lesions/tissues (x-axis).
suppressMessages(library(plotly))
##### Generate PCA plots for pancreatic related lesions
target <- target$Target
order = c(params$sample_name, "Pancreas (normal)", "Pancreas (normal adjacent)", "IPMN", "PanNET", "PDAC", "PDAC (metastatic)")
##### Generate box-plot for selected gene using sample combined with pancreas expression data
boxPlot(gene, data, target, targets.colour, params$sample_name, order )
##### Detach plotly package. Otherwise it clashes with other graphics devices
detach("package:plotly", unload=FALSE)
Table with of C3 annotation and mRNA expression levels in sample CCR170012_MH17T001P013 and average mRNA expression across samples representing different pancreas-related lesions and tissues.
TO BE COMPLETED…
DT::datatable( data = ref_datasets.list[[ref]][[5]][gene, ], filter = "none", options = list(pageLength = 1) )
Empirical cumulative distribution function (ECDF) plot illustrating mRNA expression level of C4A in the context of the overall mRNA expression distribution in sample CCR170012_MH17T001P013 and samples representing various pancreas-related lesions/tissues.
suppressMessages(library(plotly))
##### Generate PCA plots for pancreatic related lesions
ref <- "pancreas"
target <- ref_datasets.list[[ref]][[2]]
data <- ref_datasets.list[[ref]][[3]]
targets.colour <- ref_datasets.list[[ref]][[4]]$targets
datasets.colour <- ref_datasets.list[[ref]][[4]]$datasets
gene <- ref_genes.list[["genes_fusions"]][4,]
cdfPlot(gene, data, target, params$sample_name)
##### Detach plotly package. Otherwise it clashes with other graphics devices
detach("package:plotly", unload=FALSE)
Box-plot illustrating mRNA expression level of C4A (y-axis) in sample CCR170012_MH17T001P013 and its expression distribution in samples representing various pancreas-related lesions/tissues (x-axis).
suppressMessages(library(plotly))
##### Generate PCA plots for pancreatic related lesions
target <- target$Target
order = c(params$sample_name, "Pancreas (normal)", "Pancreas (normal adjacent)", "IPMN", "PanNET", "PDAC", "PDAC (metastatic)")
##### Generate box-plot for selected gene using sample combined with pancreas expression data
boxPlot(gene, data, target, targets.colour, params$sample_name, order )
##### Detach plotly package. Otherwise it clashes with other graphics devices
detach("package:plotly", unload=FALSE)
Table with of C4A annotation and mRNA expression levels in sample CCR170012_MH17T001P013 and average mRNA expression across samples representing different pancreas-related lesions and tissues.
TO BE COMPLETED…
DT::datatable( data = ref_datasets.list[[ref]][[5]][gene, ], filter = "none", options = list(pageLength = 1) )
Empirical cumulative distribution function (ECDF) plot illustrating mRNA expression level of FGG in the context of the overall mRNA expression distribution in sample CCR170012_MH17T001P013 and samples representing various pancreas-related lesions/tissues.
suppressMessages(library(plotly))
##### Generate PCA plots for pancreatic related lesions
ref <- "pancreas"
target <- ref_datasets.list[[ref]][[2]]
data <- ref_datasets.list[[ref]][[3]]
targets.colour <- ref_datasets.list[[ref]][[4]]$targets
datasets.colour <- ref_datasets.list[[ref]][[4]]$datasets
gene <- ref_genes.list[["genes_fusions"]][5,]
cdfPlot(gene, data, target, params$sample_name)
##### Detach plotly package. Otherwise it clashes with other graphics devices
detach("package:plotly", unload=FALSE)
Box-plot illustrating mRNA expression level of FGG (y-axis) in sample CCR170012_MH17T001P013 and its expression distribution in samples representing various pancreas-related lesions/tissues (x-axis).
suppressMessages(library(plotly))
##### Generate PCA plots for pancreatic related lesions
target <- target$Target
order = c(params$sample_name, "Pancreas (normal)", "Pancreas (normal adjacent)", "IPMN", "PanNET", "PDAC", "PDAC (metastatic)")
##### Generate box-plot for selected gene using sample combined with pancreas expression data
boxPlot(gene, data, target, targets.colour, params$sample_name, order )
##### Detach plotly package. Otherwise it clashes with other graphics devices
detach("package:plotly", unload=FALSE)
Table with of FGG annotation and mRNA expression levels in sample CCR170012_MH17T001P013 and average mRNA expression across samples representing different pancreas-related lesions and tissues.
TO BE COMPLETED…
DT::datatable( data = ref_datasets.list[[ref]][[5]][gene, ], filter = "none", options = list(pageLength = 1) )
Empirical cumulative distribution function (ECDF) plot illustrating mRNA expression level of GC in the context of the overall mRNA expression distribution in sample CCR170012_MH17T001P013 and samples representing various pancreas-related lesions/tissues.
suppressMessages(library(plotly))
##### Generate PCA plots for pancreatic related lesions
ref <- "pancreas"
target <- ref_datasets.list[[ref]][[2]]
data <- ref_datasets.list[[ref]][[3]]
targets.colour <- ref_datasets.list[[ref]][[4]]$targets
datasets.colour <- ref_datasets.list[[ref]][[4]]$datasets
gene <- ref_genes.list[["genes_fusions"]][6,]
cdfPlot(gene, data, target, params$sample_name)
##### Detach plotly package. Otherwise it clashes with other graphics devices
detach("package:plotly", unload=FALSE)
Box-plot illustrating mRNA expression level of GC (y-axis) in sample CCR170012_MH17T001P013 and its expression distribution in samples representing various pancreas-related lesions/tissues (x-axis).
suppressMessages(library(plotly))
##### Generate PCA plots for pancreatic related lesions
target <- target$Target
order = c(params$sample_name, "Pancreas (normal)", "Pancreas (normal adjacent)", "IPMN", "PanNET", "PDAC", "PDAC (metastatic)")
##### Generate box-plot for selected gene using sample combined with pancreas expression data
boxPlot(gene, data, target, targets.colour, params$sample_name, order )
##### Detach plotly package. Otherwise it clashes with other graphics devices
detach("package:plotly", unload=FALSE)
Table with of GC annotation and mRNA expression levels in sample CCR170012_MH17T001P013 and average mRNA expression across samples representing different pancreas-related lesions and tissues.
TO BE COMPLETED…
DT::datatable( data = ref_datasets.list[[ref]][[5]][gene, ], filter = "none", options = list(pageLength = 1) )
Empirical cumulative distribution function (ECDF) plot illustrating mRNA expression level of HP in the context of the overall mRNA expression distribution in sample CCR170012_MH17T001P013 and samples representing various pancreas-related lesions/tissues.
suppressMessages(library(plotly))
##### Generate PCA plots for pancreatic related lesions
ref <- "pancreas"
target <- ref_datasets.list[[ref]][[2]]
data <- ref_datasets.list[[ref]][[3]]
targets.colour <- ref_datasets.list[[ref]][[4]]$targets
datasets.colour <- ref_datasets.list[[ref]][[4]]$datasets
gene <- ref_genes.list[["genes_fusions"]][7,]
cdfPlot(gene, data, target, params$sample_name)
##### Detach plotly package. Otherwise it clashes with other graphics devices
detach("package:plotly", unload=FALSE)
Box-plot illustrating mRNA expression level of HP (y-axis) in sample CCR170012_MH17T001P013 and its expression distribution in samples representing various pancreas-related lesions/tissues (x-axis).
suppressMessages(library(plotly))
##### Generate PCA plots for pancreatic related lesions
target <- target$Target
order = c(params$sample_name, "Pancreas (normal)", "Pancreas (normal adjacent)", "IPMN", "PanNET", "PDAC", "PDAC (metastatic)")
##### Generate box-plot for selected gene using sample combined with pancreas expression data
boxPlot(gene, data, target, targets.colour, params$sample_name, order )
##### Detach plotly package. Otherwise it clashes with other graphics devices
detach("package:plotly", unload=FALSE)
Table with of HP annotation and mRNA expression levels in sample CCR170012_MH17T001P013 and average mRNA expression across samples representing different pancreas-related lesions and tissues.
TO BE COMPLETED…
DT::datatable( data = ref_datasets.list[[ref]][[5]][gene, ], filter = "none", options = list(pageLength = 1) )
The expression profiles of genes included in the Peter Mac comprehensive cancer panel (PMCC) across pancreas-related lesions/tissues.
Table presenting mRNA expression levels of PMCC panel genes in sample CCR170012_MH17T001P013 and samples representing various pancreas-related lesions/tissues.
TO BE COMPLETED…
Empirical cumulative distribution function (ECDF) plot illustrating mRNA expression level of KRAS in the context of the overall mRNA expression distribution in sample CCR170012_MH17T001P013 and samples representing various pancreas-related lesions/tissues.
suppressMessages(library(plotly))
##### Generate PCA plots for pancreatic related lesions
ref <- "pancreas"
target <- ref_datasets.list[[ref]][[2]]
data <- ref_datasets.list[[ref]][[3]]
targets.colour <- ref_datasets.list[[ref]][[4]]$targets
datasets.colour <- ref_datasets.list[[ref]][[4]]$datasets
gene <- ref_genes.list[["genes_pmcc"]][1,]
cdfPlot(gene, data, target, params$sample_name)
##### Detach plotly package. Otherwise it clashes with other graphics devices
detach("package:plotly", unload=FALSE)
Box-plot illustrating mRNA expression level of KRAS (y-axis) in sample CCR170012_MH17T001P013 and its expression distribution in samples representing various pancreas-related lesions/tissues (x-axis).
suppressMessages(library(plotly))
##### Generate PCA plots for pancreatic related lesions
target <- target$Target
order = c(params$sample_name, "Pancreas (normal)", "Pancreas (normal adjacent)", "IPMN", "PanNET", "PDAC", "PDAC (metastatic)")
##### Generate box-plot for selected gene using sample combined with pancreas expression data
boxPlot(gene, data, target, targets.colour, params$sample_name, order )
##### Detach plotly package. Otherwise it clashes with other graphics devices
detach("package:plotly", unload=FALSE)
Table with of KRAS annotation and mRNA expression levels in sample CCR170012_MH17T001P013 and average mRNA expression across samples representing different pancreas-related lesions and tissues.
TO BE COMPLETED…
DT::datatable( data = ref_datasets.list[[ref]][[5]][gene, ], filter = "none", options = list(pageLength = 1) )
Empirical cumulative distribution function (ECDF) plot illustrating mRNA expression level of SMAD4 in the context of the overall mRNA expression distribution in sample CCR170012_MH17T001P013 and samples representing various pancreas-related lesions/tissues.
suppressMessages(library(plotly))
##### Generate PCA plots for pancreatic related lesions
ref <- "pancreas"
target <- ref_datasets.list[[ref]][[2]]
data <- ref_datasets.list[[ref]][[3]]
targets.colour <- ref_datasets.list[[ref]][[4]]$targets
datasets.colour <- ref_datasets.list[[ref]][[4]]$datasets
gene <- ref_genes.list[["genes_pmcc"]][2,]
cdfPlot(gene, data, target, params$sample_name)
##### Detach plotly package. Otherwise it clashes with other graphics devices
detach("package:plotly", unload=FALSE)
Box-plot illustrating mRNA expression level of SMAD4 (y-axis) in sample CCR170012_MH17T001P013 and its expression distribution in samples representing various pancreas-related lesions/tissues (x-axis).
suppressMessages(library(plotly))
##### Generate PCA plots for pancreatic related lesions
target <- target$Target
order = c(params$sample_name, "Pancreas (normal)", "Pancreas (normal adjacent)", "IPMN", "PanNET", "PDAC", "PDAC (metastatic)")
##### Generate box-plot for selected gene using sample combined with pancreas expression data
boxPlot(gene, data, target, targets.colour, params$sample_name, order )
##### Detach plotly package. Otherwise it clashes with other graphics devices
detach("package:plotly", unload=FALSE)
Table with of SMAD4 annotation and mRNA expression levels in sample CCR170012_MH17T001P013 and average mRNA expression across samples representing different pancreas-related lesions and tissues.
TO BE COMPLETED…
DT::datatable( data = ref_datasets.list[[ref]][[5]][gene, ], filter = "none", options = list(pageLength = 1) )
Empirical cumulative distribution function (ECDF) plot illustrating mRNA expression level of CDKN2A in the context of the overall mRNA expression distribution in sample CCR170012_MH17T001P013 and samples representing various pancreas-related lesions/tissues.
suppressMessages(library(plotly))
##### Generate PCA plots for pancreatic related lesions
ref <- "pancreas"
target <- ref_datasets.list[[ref]][[2]]
data <- ref_datasets.list[[ref]][[3]]
targets.colour <- ref_datasets.list[[ref]][[4]]$targets
datasets.colour <- ref_datasets.list[[ref]][[4]]$datasets
gene <- ref_genes.list[["genes_pmcc"]][3,]
cdfPlot(gene, data, target, params$sample_name)
##### Detach plotly package. Otherwise it clashes with other graphics devices
detach("package:plotly", unload=FALSE)
Box-plot illustrating mRNA expression level of CDKN2A (y-axis) in sample CCR170012_MH17T001P013 and its expression distribution in samples representing various pancreas-related lesions/tissues (x-axis).
suppressMessages(library(plotly))
##### Generate PCA plots for pancreatic related lesions
target <- target$Target
order = c(params$sample_name, "Pancreas (normal)", "Pancreas (normal adjacent)", "IPMN", "PanNET", "PDAC", "PDAC (metastatic)")
##### Generate box-plot for selected gene using sample combined with pancreas expression data
boxPlot(gene, data, target, targets.colour, params$sample_name, order )
##### Detach plotly package. Otherwise it clashes with other graphics devices
detach("package:plotly", unload=FALSE)
Table with of CDKN2A annotation and mRNA expression levels in sample CCR170012_MH17T001P013 and average mRNA expression across samples representing different pancreas-related lesions and tissues.
TO BE COMPLETED…
DT::datatable( data = ref_datasets.list[[ref]][[5]][gene, ], filter = "none", options = list(pageLength = 1) )
Empirical cumulative distribution function (ECDF) plot illustrating mRNA expression level of TP53 in the context of the overall mRNA expression distribution in sample CCR170012_MH17T001P013 and samples representing various pancreas-related lesions/tissues.
suppressMessages(library(plotly))
##### Generate PCA plots for pancreatic related lesions
ref <- "pancreas"
target <- ref_datasets.list[[ref]][[2]]
data <- ref_datasets.list[[ref]][[3]]
targets.colour <- ref_datasets.list[[ref]][[4]]$targets
datasets.colour <- ref_datasets.list[[ref]][[4]]$datasets
gene <- ref_genes.list[["genes_pmcc"]][4,]
cdfPlot(gene, data, target, params$sample_name)
##### Detach plotly package. Otherwise it clashes with other graphics devices
detach("package:plotly", unload=FALSE)
Box-plot illustrating mRNA expression level of TP53 (y-axis) in sample CCR170012_MH17T001P013 and its expression distribution in samples representing various pancreas-related lesions/tissues (x-axis).
suppressMessages(library(plotly))
##### Generate PCA plots for pancreatic related lesions
target <- target$Target
order = c(params$sample_name, "Pancreas (normal)", "Pancreas (normal adjacent)", "IPMN", "PanNET", "PDAC", "PDAC (metastatic)")
##### Generate box-plot for selected gene using sample combined with pancreas expression data
boxPlot(gene, data, target, targets.colour, params$sample_name, order )
##### Detach plotly package. Otherwise it clashes with other graphics devices
detach("package:plotly", unload=FALSE)
Table with of TP53 annotation and mRNA expression levels in sample CCR170012_MH17T001P013 and average mRNA expression across samples representing different pancreas-related lesions and tissues.
TO BE COMPLETED…
DT::datatable( data = ref_datasets.list[[ref]][[5]][gene, ], filter = "none", options = list(pageLength = 1) )
Empirical cumulative distribution function (ECDF) plot illustrating mRNA expression level of MYC in the context of the overall mRNA expression distribution in sample CCR170012_MH17T001P013 and samples representing various pancreas-related lesions/tissues.
suppressMessages(library(plotly))
##### Generate PCA plots for pancreatic related lesions
ref <- "pancreas"
target <- ref_datasets.list[[ref]][[2]]
data <- ref_datasets.list[[ref]][[3]]
targets.colour <- ref_datasets.list[[ref]][[4]]$targets
datasets.colour <- ref_datasets.list[[ref]][[4]]$datasets
gene <- ref_genes.list[["genes_pmcc"]][5,]
cdfPlot(gene, data, target, params$sample_name)
##### Detach plotly package. Otherwise it clashes with other graphics devices
detach("package:plotly", unload=FALSE)
Box-plot illustrating mRNA expression level of MYC (y-axis) in sample CCR170012_MH17T001P013 and its expression distribution in samples representing various pancreas-related lesions/tissues (x-axis).
suppressMessages(library(plotly))
##### Generate PCA plots for pancreatic related lesions
target <- target$Target
order = c(params$sample_name, "Pancreas (normal)", "Pancreas (normal adjacent)", "IPMN", "PanNET", "PDAC", "PDAC (metastatic)")
##### Generate box-plot for selected gene using sample combined with pancreas expression data
boxPlot(gene, data, target, targets.colour, params$sample_name, order )
##### Detach plotly package. Otherwise it clashes with other graphics devices
detach("package:plotly", unload=FALSE)
Table with of MYC annotation and mRNA expression levels in sample CCR170012_MH17T001P013 and average mRNA expression across samples representing different pancreas-related lesions and tissues.
TO BE COMPLETED…
DT::datatable( data = ref_datasets.list[[ref]][[5]][gene, ], filter = "none", options = list(pageLength = 1) )
for ( i in 1:length(params) ) {
cat(paste("Parameter: ", names(params)[i], "\nValue: ", paste(unlist(params[i]), collapse = ","), "\n\n", sep=""))
}
Parameter: datasets_pancreas
Value: ../data/Datasets_list_pancreas.txt
Parameter: genes_fusions
Value: ../data/Genes_fusions.txt
Parameter: genes_pmcc
Value: ../data/Genes_PMCC.txt
Parameter: report_dir
Value: ../reports
Parameter: sample_name
Value: CCR170012_MH17T001P013
Parameter: count_file
Value: ../data/CCR170012_MH17T001P013-ready.counts
devtools::session_info()
## Session info -------------------------------------------------------------
## setting value
## version R version 3.5.0 (2018-04-23)
## system x86_64, darwin15.6.0
## ui X11
## language (EN)
## collate en_AU.UTF-8
## tz Australia/Melbourne
## date 2018-09-27
## Packages -----------------------------------------------------------------
## package * version date source
## AnnotationDbi * 1.42.1 2018-05-08 Bioconductor
## assertthat 0.2.0 2017-04-11 CRAN (R 3.5.0)
## backports 1.1.2 2017-12-13 CRAN (R 3.5.0)
## base * 3.5.0 2018-04-24 local
## bindr 0.1.1 2018-03-13 CRAN (R 3.5.0)
## bindrcpp * 0.2.2 2018-03-29 CRAN (R 3.5.0)
## Biobase * 2.40.0 2018-05-01 Bioconductor
## BiocGenerics * 0.26.0 2018-05-01 Bioconductor
## bit 1.1-14 2018-05-29 CRAN (R 3.5.0)
## bit64 0.9-7 2017-05-08 CRAN (R 3.5.0)
## blob 1.1.1 2018-03-25 CRAN (R 3.5.0)
## colorspace 1.3-2 2016-12-14 CRAN (R 3.5.0)
## compiler 3.5.0 2018-04-24 local
## crayon 1.3.4 2017-09-16 CRAN (R 3.5.0)
## crosstalk 1.0.0 2016-12-21 CRAN (R 3.5.0)
## data.table 1.11.4 2018-05-27 CRAN (R 3.5.0)
## datasets * 3.5.0 2018-04-24 local
## DBI 1.0.0 2018-05-02 CRAN (R 3.5.0)
## devtools 1.13.6 2018-06-27 CRAN (R 3.5.0)
## digest 0.6.16 2018-08-22 CRAN (R 3.5.0)
## dplyr 0.7.6 2018-06-29 CRAN (R 3.5.1)
## DT 0.4 2018-01-30 CRAN (R 3.5.0)
## edgeR * 3.22.4 2018-09-23 Bioconductor
## evaluate 0.11 2018-07-17 CRAN (R 3.5.0)
## getopt 1.20.2 2018-02-16 CRAN (R 3.5.0)
## ggplot2 * 3.0.0 2018-07-03 CRAN (R 3.5.0)
## glue 1.3.0 2018-07-17 CRAN (R 3.5.0)
## graphics * 3.5.0 2018-04-24 local
## grDevices * 3.5.0 2018-04-24 local
## grid 3.5.0 2018-04-24 local
## gtable 0.2.0 2016-02-26 CRAN (R 3.5.0)
## htmltools 0.3.6 2017-04-28 CRAN (R 3.5.0)
## htmlwidgets 1.2 2018-04-19 CRAN (R 3.5.0)
## httpuv 1.4.5 2018-07-19 CRAN (R 3.5.0)
## httr 1.3.1 2017-08-20 CRAN (R 3.5.0)
## IRanges * 2.14.12 2018-09-20 Bioconductor
## jsonlite 1.5 2017-06-01 CRAN (R 3.5.0)
## knitr 1.20 2018-02-20 CRAN (R 3.5.0)
## later 0.7.4 2018-08-31 CRAN (R 3.5.0)
## lattice 0.20-35 2017-03-25 CRAN (R 3.5.0)
## lazyeval 0.2.1 2017-10-29 CRAN (R 3.5.0)
## limma * 3.36.5 2018-09-20 Bioconductor
## locfit 1.5-9.1 2013-04-20 CRAN (R 3.5.0)
## magrittr 1.5 2014-11-22 CRAN (R 3.5.0)
## memoise 1.1.0 2017-04-21 CRAN (R 3.5.0)
## methods * 3.5.0 2018-04-24 local
## mime 0.5 2016-07-07 CRAN (R 3.5.0)
## munsell 0.5.0 2018-06-12 CRAN (R 3.5.0)
## optparse * 1.6.0 2018-06-17 CRAN (R 3.5.0)
## org.Hs.eg.db * 3.6.0 2018-08-08 Bioconductor
## pander 0.6.2 2018-07-08 CRAN (R 3.5.0)
## parallel * 3.5.0 2018-04-24 local
## pillar 1.3.0 2018-07-14 CRAN (R 3.5.0)
## pkgconfig 2.0.2 2018-08-16 CRAN (R 3.5.0)
## plotly 4.8.0 2018-07-20 CRAN (R 3.5.0)
## plyr 1.8.4 2016-06-08 CRAN (R 3.5.0)
## preprocessCore * 1.42.0 2018-05-01 Bioconductor
## promises 1.0.1 2018-04-13 CRAN (R 3.5.0)
## purrr 0.2.5 2018-05-29 CRAN (R 3.5.0)
## R6 2.2.2 2017-06-17 CRAN (R 3.5.0)
## rapportools * 1.0 2014-01-07 CRAN (R 3.5.0)
## Rcpp 0.12.18 2018-07-23 CRAN (R 3.5.0)
## reshape * 0.8.7 2017-08-06 CRAN (R 3.5.0)
## rlang 0.2.2 2018-08-16 CRAN (R 3.5.0)
## rmarkdown 1.10 2018-06-11 CRAN (R 3.5.0)
## rprojroot 1.3-2 2018-01-03 CRAN (R 3.5.0)
## RSQLite 2.1.1 2018-05-06 CRAN (R 3.5.0)
## S4Vectors * 0.18.3 2018-06-08 Bioconductor
## scales 1.0.0 2018-08-09 CRAN (R 3.5.0)
## shiny 1.1.0 2018-05-17 CRAN (R 3.5.0)
## stats * 3.5.0 2018-04-24 local
## stats4 * 3.5.0 2018-04-24 local
## stringi 1.2.4 2018-07-20 CRAN (R 3.5.0)
## stringr 1.3.1 2018-05-10 CRAN (R 3.5.0)
## tibble 1.4.2 2018-01-22 CRAN (R 3.5.0)
## tidyr 0.8.1 2018-05-18 CRAN (R 3.5.0)
## tidyselect 0.2.4 2018-02-26 CRAN (R 3.5.0)
## tools 3.5.0 2018-04-24 local
## utils * 3.5.0 2018-04-24 local
## viridisLite 0.3.0 2018-02-01 CRAN (R 3.5.0)
## withr 2.1.2 2018-03-15 CRAN (R 3.5.0)
## xtable 1.8-3 2018-08-29 CRAN (R 3.5.0)
## yaml 2.2.0 2018-07-25 CRAN (R 3.5.0)